In [1]:
pip install neo4j
Defaulting to user installation because normal site-packages is not writeable
Requirement already satisfied: neo4j in c:\users\user\appdata\roaming\python\python313\site-packages (6.1.0)
Requirement already satisfied: pytz in c:\programdata\anaconda3\lib\site-packages (from neo4j) (2024.1)
Note: you may need to restart the kernel to use updated packages.
In [2]:
pip install py2neo
Defaulting to user installation because normal site-packages is not writeable
Requirement already satisfied: py2neo in c:\users\user\appdata\roaming\python\python313\site-packages (2021.2.4)
Requirement already satisfied: certifi in c:\programdata\anaconda3\lib\site-packages (from py2neo) (2025.4.26)
Requirement already satisfied: interchange~=2021.0.4 in c:\users\user\appdata\roaming\python\python313\site-packages (from py2neo) (2021.0.4)
Requirement already satisfied: monotonic in c:\users\user\appdata\roaming\python\python313\site-packages (from py2neo) (1.6)
Requirement already satisfied: packaging in c:\programdata\anaconda3\lib\site-packages (from py2neo) (24.2)
Requirement already satisfied: pansi>=2020.7.3 in c:\users\user\appdata\roaming\python\python313\site-packages (from py2neo) (2024.11.0)
Requirement already satisfied: pygments>=2.0.0 in c:\programdata\anaconda3\lib\site-packages (from py2neo) (2.19.1)
Requirement already satisfied: six>=1.15.0 in c:\programdata\anaconda3\lib\site-packages (from py2neo) (1.17.0)
Requirement already satisfied: urllib3 in c:\programdata\anaconda3\lib\site-packages (from py2neo) (2.3.0)
Requirement already satisfied: pytz in c:\programdata\anaconda3\lib\site-packages (from interchange~=2021.0.4->py2neo) (2024.1)
Requirement already satisfied: pillow in c:\programdata\anaconda3\lib\site-packages (from pansi>=2020.7.3->py2neo) (11.1.0)
Note: you may need to restart the kernel to use updated packages.

Import Libraries

In [3]:
from py2neo import Graph
import pandas as pd
from neo4j import GraphDatabase 
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt

Connect to Neo4j

In [5]:
# Connect to Neo4j over Bolt, specifying your database
graph = Graph("bolt://localhost:7687", auth=("neo4j", "Neo4j@123"), name="usroadnetworkdb")
if graph:
    print("Connected to Neo4j")
else:
    print("Not Connected")
Connected to Neo4j

4. Analyze the centrality of intersections using the Betweenness Centrality algorithm()

Top 20 Intersections by Betweenness Centrality

In [6]:
# Query betweenness scores
query = """
CALL gds.betweenness.stream('usRoadGraph', { samplingSize: 1000 })
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).id AS intersectionId, score
ORDER BY score DESC
LIMIT 20;
"""
df = pd.DataFrame(graph.run(query).data())
print(df)
    intersectionId         score
0             9233  1.659771e+07
1             1939  1.655155e+07
2             1917  1.653462e+07
3             1921  1.649183e+07
4             1920  1.649179e+07
5            10666  1.645009e+07
6             1916  1.642931e+07
7             1906  1.642927e+07
8             8509  1.601949e+07
9             1903  1.599096e+07
10            1902  1.599087e+07
11           10767  1.581709e+07
12           10769  1.578116e+07
13            8978  1.555402e+07
14            8979  1.555399e+07
15            8980  1.555395e+07
16           10875  1.225606e+07
17           11009  1.204013e+07
18           10993  1.202951e+07
19           10880  1.201712e+07

Pie chart to visuilze the Top 20 Intersections by Betweenness Centrality

In [7]:
plt.figure(figsize=(10,6))
plt.bar(df["intersectionId"].astype(str), df["score"])
plt.xlabel("Intersection ID")
plt.ylabel("Betweenness Centrality Score")
plt.title("Top 20 Intersections by Betweenness Centrality")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [30]:
# ================================
# Neo4j → Plotly (Dynamic) using py2neo Graph
# - Q5 Dashboard (Totals + Degree Distribution)
# - Q6 Degree Distribution Bar Chart
# - Q7 Top 10 Most Connected Intersections
# - Q8 Intersection Categories by Degree
#
# Assumption: you ALREADY have `graph` defined, e.g.:
# from py2neo import Graph
# graph = Graph("bolt://localhost:7687", auth=("neo4j","PASSWORD"), name="usroadnetworkdb")
# ================================

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# -------------------------------
# Cypher queries (Neo4j 5+ compatible)
# -------------------------------

# Total intersections
Q_TOTAL_INTERSECTIONS = """
MATCH (i:Intersection)
RETURN count(i) AS totalIntersections;
"""

# Total roads (UNIQUE physical roads, lecture-aligned)
Q_TOTAL_ROADS_UNIQUE = """
MATCH (a:Intersection)-[:ROAD]-(b:Intersection)
WITH CASE WHEN a.id < b.id THEN [a.id, b.id] ELSE [b.id, a.id] END AS road
RETURN count(DISTINCT road) AS totalRoads;
"""

# Degree distribution (for Q6 and dashboard)
Q_DEGREE_DISTRIBUTION = """
MATCH (i:Intersection)
WITH COUNT { (i)-[:ROAD]-() } AS degree
RETURN degree, count(*) AS frequency
ORDER BY degree;
"""

# Top 10 most connected intersections (Q7)
Q_TOP10_DEGREE = """
MATCH (i:Intersection)-[:ROAD]-()
WITH i, count(*) AS degree
RETURN i.id AS intersectionId, degree
ORDER BY degree DESC
LIMIT 10;
"""

# Categories by degree (Q8)
Q_DEGREE_CATEGORIES = """
MATCH (i:Intersection)
WITH COUNT { (i)-[:ROAD]-() } AS degree
WITH CASE
  WHEN degree <= 2 THEN 'Low Connectivity'
  WHEN degree <= 4 THEN 'Medium Connectivity'
  ELSE 'High Connectivity'
END AS category
RETURN category, count(*) AS numberOfIntersections
ORDER BY numberOfIntersections DESC;
"""

# Average degree (optional check)
Q_AVG_DEGREE = """
MATCH (i:Intersection)
RETURN avg(COUNT { (i)-[:ROAD]-() }) AS avgDegree;
"""

# -------------------------------
# Helpers
# -------------------------------
def scalar(query: str):
    """Return a single scalar value from a Cypher query."""
    return graph.run(query).evaluate()

def df(query: str) -> pd.DataFrame:
    """Return a DataFrame from a Cypher query."""
    return graph.run(query).to_data_frame()

# -------------------------------
# Fetch data dynamically
# -------------------------------
total_intersections = scalar(Q_TOTAL_INTERSECTIONS)
total_roads_unique = scalar(Q_TOTAL_ROADS_UNIQUE)
avg_degree = scalar(Q_AVG_DEGREE)  # optional (e.g., ~2.8)

degree_dist_df = df(Q_DEGREE_DISTRIBUTION)
top10_df = df(Q_TOP10_DEGREE)
categories_df = df(Q_DEGREE_CATEGORIES)

# ================================
# Q5: SIMPLE DASHBOARD (Totals + Degree Distribution)
# ================================
dashboard = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "indicator"}, {"type": "indicator"}],
           [{"colspan": 2}, None]],
    subplot_titles=("Total Intersections", "Total Roads (Unique)", "Degree Distribution")
)

dashboard.add_trace(
    go.Indicator(mode="number", value=total_intersections),
    row=1, col=1
)

dashboard.add_trace(
    go.Indicator(mode="number", value=total_roads_unique),
    row=1, col=2
)

dashboard.add_trace(
    go.Bar(x=degree_dist_df["degree"], y=degree_dist_df["frequency"]),
    row=2, col=1
)

dashboard.update_layout(
    title="US Road Network Overview Dashboard",
    height=720,
    showlegend=False
)

dashboard.show()

# ================================
# Q6: DEGREE DISTRIBUTION BAR CHART (standalone)
# ================================
fig_q6 = px.bar(
    degree_dist_df,
    x="degree",
    y="frequency",
    labels={"degree": "Number of Roads (Degree)", "frequency": "Number of Intersections"},
    title="Q6: Degree Distribution of Intersections"
)
fig_q6.update_layout(xaxis=dict(tickmode="linear"))
fig_q6.show()

# ================================
# Q7: TOP 10 MOST CONNECTED INTERSECTIONS (standalone)
# ================================
fig_q7 = px.bar(
    top10_df,
    x="intersectionId",
    y="degree",
    labels={"intersectionId": "Intersection ID", "degree": "Number of Connected Roads"},
    title="Q7: Top 10 Most Connected Intersections"
)
fig_q7.update_layout(xaxis=dict(type="category"))
fig_q7.show()

# ================================
# Q8: INTERSECTION CATEGORIES BY DEGREE (standalone)
# ================================
fig_q8 = px.bar(
    categories_df,
    x="category",
    y="numberOfIntersections",
    labels={"category": "Connectivity Category", "numberOfIntersections": "Number of Intersections"},
    title="Q8: Intersection Categories by Degree"
)
fig_q8.show()

# ================================
# Optional: print quick sanity checks
# ================================
print("Total Intersections:", total_intersections)
print("Total Roads (Unique):", total_roads_unique)
print("Average Degree:", avg_degree)
Total Intersections: 87575
Total Roads (Unique): 121491
Average Degree: 2.774558949471861

6.Degree Distribution Bar Chart

In [31]:
# Cypher query (Degree Distribution)
query = """
MATCH (i:Intersection)
WITH COUNT { (i)-[:ROAD]-() } AS degree
RETURN degree, count(*) AS frequency
ORDER BY degree
"""

# ----------------------------------
# Run query using graph (no session)
# ----------------------------------
df = graph.run(query).to_data_frame()

# ----------------------------------
# Plotly bar chart
# ----------------------------------
fig = px.bar(
    df,
    x="degree",
    y="frequency",
    labels={
        "degree": "Number of Roads (Degree)",
        "frequency": "Number of Intersections"
    },
    title="Degree Distribution of Intersections in the US Road Network"
)

fig.update_layout(
    xaxis=dict(tickmode="linear"),
    bargap=0.2
)

fig.show()

7. Top 10 Most Connected Intersections:

In [32]:
# Run Cypher query dynamically
# ----------------------------------
query = """
MATCH (i:Intersection)-[:ROAD]-()
WITH i, count(*) AS degree
RETURN i.id AS intersectionId, degree
ORDER BY degree DESC
LIMIT 10
"""

df = graph.run(query).to_data_frame()

# ----------------------------------
# Plotly bar chart
# ----------------------------------
fig = px.bar(
    df,
    x="intersectionId",
    y="degree",
    labels={
        "intersectionId": "Intersection ID",
        "degree": "Number of Connected Roads"
    },
    title="Top 10 Most Connected Intersections in the US Road Network"
)

fig.update_layout(
    xaxis=dict(type="category"),
    bargap=0.3
)

fig.show()

8. Intersection Categories by Degree

In [33]:
# Run query dynamically from db
query = """
MATCH (i:Intersection)
WITH COUNT { (i)-[:ROAD]-() } AS degree
WITH
  CASE
    WHEN degree <= 2 THEN 'Low Connectivity'
    WHEN degree <= 4 THEN 'Medium Connectivity'
    ELSE 'High Connectivity'
  END AS category
RETURN category, count(*) AS numberOfIntersections
ORDER BY numberOfIntersections DESC
"""

df = graph.run(query).to_data_frame()

# Bar chart
fig = px.bar(
    df,
    x="category",
    y="numberOfIntersections",
    labels={
        "category": "Intersection Connectivity Category",
        "numberOfIntersections": "Number of Intersections"
    },
    title="Intersection Categories by Degree in the US Road Network"
)

fig.show()

Pie Chart: Road Network Composition

Bar Chart: Road Network Metrics

In [11]:
import plotly.graph_objects as go

# ---------------------------------
# Get values from Neo4j
# ---------------------------------

intersections = graph.run("""
MATCH (i:Intersection)
RETURN count(i)
""").evaluate()

roads = graph.run("""
MATCH (a:Intersection)-[:ROAD]-(b:Intersection)
WITH CASE WHEN a.id < b.id THEN [a.id, b.id] ELSE [b.id, a.id] END AS road
RETURN count(DISTINCT road)
""").evaluate()

# ---------------------------------
# Draw bar chart
# ---------------------------------

fig = go.Figure(
    data=[
        go.Bar(
            x=["Intersections", "Roads"],
            y=[intersections, roads],
            text=[intersections, roads],
            textposition="auto",
            marker_color=["steelblue", "darkorange"]
        )
    ]
)

fig.update_layout(
    title="US Road Network Metrics",
    xaxis_title="Metric",
    yaxis_title="Count"
)

fig.show()

6. Degree Distribution Bar Chart

In [38]:
# Query degree distribution
with driver.session(database="road-network-db") as session:
    results = session.run("""
        MATCH (i:Intersection)-[r]-()
        WITH i, count(r) AS degree
        RETURN degree, count(*) AS frequency
        ORDER BY degree
    """)
    df = pd.DataFrame([dict(record) for record in results])

print(df)  # Shows degree vs frequency table

# Plot degree distribution
fig = px.bar(df, x="degree", y="frequency",
             labels={"degree": "Node Degree (# of roads)", "frequency": "Number of Intersections"},
             title="Degree Distribution of Intersections")
fig.show()
    degree  frequency
0        2        258
1        4        236
2        6        238
3        8        312
4       10        289
..     ...        ...
70     144          2
71     146          2
72     148          1
73     150          1
74     158          1

[75 rows x 2 columns]

7. Top 10 Most Connected Intersections Visualization

In [37]:
# Query top 10 intersections and build DataFrame directly
with driver.session(database="road-network-db") as session:
    results = session.run("""
        MATCH (i:Intersection)-[r]-()
        WITH i, count(r) AS degree
        ORDER BY degree DESC
        LIMIT 10
        RETURN i.id AS intersection_id, degree
    """)
    df = pd.DataFrame([dict(record) for record in results])

print(df)  # Shows the top 10 intersections with their degree

# Plot bar chart
fig = px.bar(df, x="intersection_id", y="degree",
             labels={"intersection_id": "Intersection ID", "degree": "Degree (# of roads)"},
             title="Top 10 Most Connected Intersections")
fig.show()
   intersection_id  degree
0             2804     158
1             2803     150
2             3195     148
3             2807     146
4             2832     146
5             2799     144
6             3022     144
7             2806     140
8             1640     138
9             1627     138

8. Intersection Categories by Degree

In [36]:
# Query intersection categories and build DataFrame directly
with driver.session(database="road-network-db") as session:
    results = session.run("""
        MATCH (i:Intersection)-[r]-()
        WITH i, count(r) AS degree
        WITH CASE 
                WHEN degree <= 2 THEN 'Low'
                WHEN degree <= 4 THEN 'Medium'
                ELSE 'High'
             END AS category
        RETURN category, count(*) AS intersections
        ORDER BY category
    """)
    df = pd.DataFrame([dict(record) for record in results])

print(df)  # Shows category vs number of intersections

# Plot bar chart
fig = px.bar(df, x="category", y="intersections",
             labels={"category": "Connectivity Category", "intersections": "Number of Intersections"},
             title="Intersection Categories by Degree")
fig.show()
  category  intersections
0     High           9069
1      Low            258
2   Medium            236

9. Degree Distribution Bar Chart( Duplicated )

In [35]:
import pandas as pd

# Degree distribution query (Neo4j 5+ compatible)
query = """
MATCH (i:Intersection)
WITH COUNT { (i)-[:ROAD]-() } AS degree
RETURN degree, count(*) AS frequency
ORDER BY degree
"""

# Run query using py2neo Graph (NO driver/session)
df = graph.run(query).to_data_frame()

# View results
df.head()
Out[35]:
degree frequency
0 1 2234
1 2 35486
2 3 29941
3 4 19652
4 5 227

Dashboard Summary

In [32]:
Q_TOTAL_INTERSECTIONS = """
MATCH (i:Intersection)
RETURN count(i) AS totalIntersections;
"""

Q_TOTAL_ROADS = """
MATCH (a:Intersection)-[:ROAD]-(b:Intersection)
WITH CASE WHEN a.id < b.id THEN [a.id, b.id] ELSE [b.id, a.id] END AS road
RETURN count(DISTINCT road) AS totalRoads;
"""

Q_DEGREE_DIST = """
MATCH (i:Intersection)
WITH COUNT { (i)-[:ROAD]-() } AS degree
RETURN degree, count(*) AS frequency
ORDER BY degree;
"""

Q_TOP10 = """
MATCH (i:Intersection)-[:ROAD]-()
WITH i, count(*) AS degree
RETURN i.id AS intersectionId, degree
ORDER BY degree DESC
LIMIT 10;
"""

Q_CATEGORIES = """
MATCH (i:Intersection)
WITH COUNT { (i)-[:ROAD]-() } AS degree
WITH CASE
  WHEN degree <= 2 THEN 'Low'
  WHEN degree <= 4 THEN 'Medium'
  ELSE 'High'
END AS category
RETURN category, count(*) AS count
ORDER BY count DESC;
"""

# -------------------------------
# Fetch data
# -------------------------------

total_intersections = graph.run(Q_TOTAL_INTERSECTIONS).evaluate()
total_roads = graph.run(Q_TOTAL_ROADS).evaluate()

degree_df = graph.run(Q_DEGREE_DIST).to_data_frame()
top10_df = graph.run(Q_TOP10).to_data_frame()
cat_df = graph.run(Q_CATEGORIES).to_data_frame()

# -------------------------------
# ONE COMBINED DASHBOARD
# -------------------------------

fig = make_subplots(
    rows=3, cols=2,
    specs=[
        [{"type": "indicator"}, {"type": "indicator"}],
        [{"colspan": 2}, None],
        [{"type": "bar"}, {"type": "bar"}]
    ],
    subplot_titles=(
        "Total Intersections",
        "Total Roads (Unique)",
        "Degree Distribution",
        "Top 10 Most Connected Intersections",
        "Intersection Categories by Degree"
    )
)

# Indicators
fig.add_trace(go.Indicator(mode="number", value=total_intersections), row=1, col=1)
fig.add_trace(go.Indicator(mode="number", value=total_roads), row=1, col=2)

# Degree distribution
fig.add_trace(
    go.Bar(x=degree_df["degree"], y=degree_df["frequency"]),
    row=2, col=1
)

# Top 10
fig.add_trace(
    go.Bar(x=top10_df["intersectionId"], y=top10_df["degree"]),
    row=3, col=1
)

# Categories
fig.add_trace(
    go.Bar(x=cat_df["category"], y=cat_df["count"]),
    row=3, col=2
)

fig.update_layout(
    height=900,
    title="US Road Network – Summary Visual Analytics Dashboard",
    showlegend=False
)

fig.show()
In [ ]: